{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 1,
     "metadata": {},
     "source": [
      "Using server-side cursors with PostgreSQL and MySQL "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# see http://pynash.org/2013/03/06/timing-and-profiling.html for setup of profiling magics"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import sys\n",
      "sys.path.insert(0, '../src')\n",
      "import petl; print petl.VERSION\n",
      "from petl.fluent import etl\n",
      "import psycopg2\n",
      "import MySQLdb"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "0.18-SNAPSHOT\n"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tbl_dummy_data = etl().dummytable(100000)\n",
      "tbl_dummy_data.look()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "+-------+-----------+----------------------+\n",
        "| 'foo' | 'bar'     | 'baz'                |\n",
        "+=======+===========+======================+\n",
        "|    31 | 'pears'   |  0.12509339133627373 |\n",
        "+-------+-----------+----------------------+\n",
        "|    90 | 'oranges' |  0.05715662664829624 |\n",
        "+-------+-----------+----------------------+\n",
        "|    12 | 'oranges' |   0.8525934855236975 |\n",
        "+-------+-----------+----------------------+\n",
        "|    68 | 'apples'  |    0.911131148945329 |\n",
        "+-------+-----------+----------------------+\n",
        "|    77 | 'apples'  |   0.8115001426786242 |\n",
        "+-------+-----------+----------------------+\n",
        "|    94 | 'apples'  |   0.6671472950408706 |\n",
        "+-------+-----------+----------------------+\n",
        "|    55 | 'apples'  | 0.003432210002982883 |\n",
        "+-------+-----------+----------------------+\n",
        "|    50 | 'apples'  |   0.7744929413714756 |\n",
        "+-------+-----------+----------------------+\n",
        "|    82 | 'oranges' |  0.46001316056152297 |\n",
        "+-------+-----------+----------------------+\n",
        "|    13 | 'bananas' |   0.9602502583307483 |\n",
        "+-------+-----------+----------------------+\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%memit print tbl_dummy_data.nrows()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "100000\n",
        "peak memory: 51.38 MiB, increment: 0.20 MiB\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "PostgreSQL"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "psql_connection = psycopg2.connect(host='localhost', dbname='petl', user='petl', password='petl')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor = psql_connection.cursor()\n",
      "cursor.execute('DROP TABLE IF EXISTS issue_219;')\n",
      "cursor.execute('CREATE TABLE issue_219 (foo INTEGER, bar TEXT, baz FLOAT);')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%memit -r1 tbl_dummy_data.progress(10000).todb(psql_connection, 'issue_219')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "10000 rows in 3.61s (2770 row/s); batch in 3.61s (2770 row/s)\n",
        "20000 rows in 7.21s (2774 row/s); batch in 3.60s (2778 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "30000 rows in 10.96s (2736 row/s); batch in 3.75s (2663 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "40000 rows in 14.69s (2723 row/s); batch in 3.72s (2685 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "50000 rows in 18.32s (2728 row/s); batch in 3.64s (2748 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "60000 rows in 22.04s (2722 row/s); batch in 3.72s (2689 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "70000 rows in 25.76s (2717 row/s); batch in 3.72s (2687 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "80000 rows in 29.58s (2704 row/s); batch in 3.82s (2617 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "90000 rows in 33.41s (2693 row/s); batch in 3.83s (2613 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "100000 rows in 37.14s (2692 row/s); batch in 3.73s (2680 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "peak memory: 53.32 MiB, increment: 0.01 MiB\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "100000 rows in 37.14s (2692 row/s)\n"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# memory usage using default cursor\n",
      "%memit print etl.fromdb(psql_connection, 'select * from issue_219 order by foo').look(2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "+-------+-----------+-------------------+\n",
        "| 'foo' | 'bar'     | 'baz'             |\n",
        "+=======+===========+===================+\n",
        "|     0 | 'pears'   | 0.625346298507174 |\n",
        "+-------+-----------+-------------------+\n",
        "|     0 | 'bananas' | 0.191535466509102 |\n",
        "+-------+-----------+-------------------+\n",
        "\n",
        "peak memory: 60.89 MiB, increment: 7.32 MiB\n"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# memory usage using server-side cursor\n",
      "%memit print etl.fromdb(lambda: psql_connection.cursor(name='server-side'), 'select * from issue_219 order by foo').look(2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "+-------+-----------+-------------------+\n",
        "| 'foo' | 'bar'     | 'baz'             |\n",
        "+=======+===========+===================+\n",
        "|     0 | 'pears'   | 0.625346298507174 |\n",
        "+-------+-----------+-------------------+\n",
        "|     0 | 'bananas' | 0.191535466509102 |\n",
        "+-------+-----------+-------------------+\n",
        "\n",
        "peak memory: 54.38 MiB, increment: 0.00 MiB\n"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "MySQL"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "mysql_connection = MySQLdb.connect(host='127.0.0.1', db='petl', user='petl', passwd='petl')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor = mysql_connection.cursor()\n",
      "cursor.execute('SET SQL_MODE=ANSI_QUOTES')\n",
      "cursor.execute('DROP TABLE IF EXISTS issue_219;')\n",
      "cursor.execute('CREATE TABLE issue_219 (foo INTEGER, bar TEXT, baz FLOAT);')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 11,
       "text": [
        "0L"
       ]
      }
     ],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%memit -r1 tbl_dummy_data.progress(10000).todb(mysql_connection, 'issue_219')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "10000 rows in 2.96s (3373 row/s); batch in 2.96s (3373 row/s)\n",
        "20000 rows in 6.22s (3214 row/s); batch in 3.26s (3069 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "30000 rows in 9.45s (3174 row/s); batch in 3.23s (3097 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "40000 rows in 12.50s (3199 row/s); batch in 3.05s (3278 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "50000 rows in 15.60s (3205 row/s); batch in 3.10s (3229 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "60000 rows in 18.65s (3217 row/s); batch in 3.05s (3280 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "70000 rows in 21.90s (3196 row/s); batch in 3.25s (3072 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "80000 rows in 24.88s (3215 row/s); batch in 2.98s (3353 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "90000 rows in 28.08s (3205 row/s); batch in 3.19s (3130 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "100000 rows in 31.24s (3200 row/s); batch in 3.17s (3158 row/s)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "peak memory: 54.77 MiB, increment: 0.01 MiB\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "\n",
        "100000 rows in 31.24s (3200 row/s)\n"
       ]
      }
     ],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# memory usage with default cursor\n",
      "%memit print etl.fromdb(mysql_connection, 'select * from issue_219 order by foo').look(2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "+-------+-----------+-----------+\n",
        "| 'foo' | 'bar'     | 'baz'     |\n",
        "+=======+===========+===========+\n",
        "|    0L | 'bananas' |  0.191535 |\n",
        "+-------+-----------+-----------+\n",
        "|    0L | 'bananas' | 0.0228774 |\n",
        "+-------+-----------+-----------+\n",
        "\n",
        "peak memory: 79.88 MiB, increment: 25.11 MiB\n"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# memory usage with server-side cursor\n",
      "%memit print etl.fromdb(lambda: mysql_connection.cursor(MySQLdb.cursors.SSCursor), 'select * from issue_219 order by foo').look(2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "+-------+-----------+-----------+\n",
        "| 'foo' | 'bar'     | 'baz'     |\n",
        "+=======+===========+===========+\n",
        "|    0L | 'bananas' |  0.191535 |\n",
        "+-------+-----------+-----------+\n",
        "|    0L | 'bananas' | 0.0228774 |\n",
        "+-------+-----------+-----------+\n",
        "\n",
        "peak memory: 80.00 MiB, increment: 0.09 MiB\n"
       ]
      }
     ],
     "prompt_number": 15
    }
   ],
   "metadata": {}
  }
 ]
}